cd C:\Users\chausman\Dropbox\Legacy_utility_costs\Data_package_to_post
 
////////////////////////
//EIA 176 DATA
insheet using data/eia176/type_of_operations_and_sector_176.csv, names clear
foreach var of varlist * {
  local a = subinstr("`var'","br","_",.)
  rename `var' `a'
}
foreach var of varlist investor_owned municipally_owned privately_owned cooperative ///
	other_ownership interstate intrastate storage_operator sng_operator producer ///
	gatherer lng_operator lng_marine_terminal lng_public cng_public other_specify {
  local a = "`var'"
  rename `var' type_`a'
  replace type_`var'="1" if type_`var'=="X"
  destring type_`var', replace
  replace type_`var'=0 if type_`var'==.
}
foreach var of varlist residential_* {
  local a = subinstr("`var'","residential_","resi_",.)
  rename `var' `a'
}
foreach var of varlist commercial_* {
  local a = subinstr("`var'","commercial_","comm_",.)
  rename `var' `a'
}
foreach var of varlist industrial_* {
  local a = subinstr("`var'","industrial_","indu_",.)
  rename `var' `a'
}
foreach var of varlist electric_* {
  local a = subinstr("`var'","electric_","elec_",.)
  rename `var' `a'
}
foreach var of varlist vehicle_fuel_* {
  local a = subinstr("`var'","vehicle_fuel_","vehi_",.)
  rename `var' `a'
}
foreach var of varlist *_transported_* {
  local a = subinstr("`var'","transported","trans",.)
  rename `var' `a'
}
foreach s in resi comm indu elec vehi{
foreach g in total sales trans{
foreach v in volume price revenue custom {
	capture rename `s'_`g'_`v' `s'_`g'_`v'
	//capture because eg total_price doesn't exist
}
}
}
rename resi_trans_revenu resi_trans_revenue
drop vehi* 
rename company company_id_eia
rename companyname company_name
///IMPROVING IOU/MUNI DESIGNATION (which appears to have measurement error in raw data):
//sort company_id year
//br company_id company_name year type*
bysort company_id_eia: egen iou=mode(type_investor_owned), maxmode //maxmode will give 1 if tie
bysort company_id_eia: egen muni=mode(type_municipally_owned), maxmode //maxmode will give 1 if tie
bysort company_id_eia: egen coop=mode(type_cooperative), maxmode //maxmode will give 1 if tie
	//we will put these in with munis
bysort company_id_eia: egen private=mode(type_privately_owned), maxmode //maxmode will give 1 if tie	
	//we will put these in with ious
///OVERLAP between these four categories:
	//br if iou+muni==2 //GOOD; VERY FEW
		//this is an old entry that appears to be muni. current website won't help because it may have been privatized
		replace iou=0 if company_id_eia=="17695308TX"	
	//br if iou+coop==2 //GOOD; NONE
	//br if iou+private==2 //expected- these we will treat the same
	//br if muni+coop==2 //GOOD ;NONE
	//br if muni+private==2 //GOOD; NONE
	//br if coop+private==2 //GOOD; NONE
////PRIVATE NOT IOU? look at anything listed as "privately owned" and not iou==1
	tab company_name if private==1 & iou+muni+coop==0 & resi_total_custom!=0 & resi_total_custom!=.
	unique company_id if iou==1 & resi_total_custom!=0 & resi_total_custom!=. //354
	unique company_id if private==1 & iou+muni+coop==0 & resi_total_custom!=0 & resi_total_custom!=. //108
	disp 108/(108+354) //fine; when we lump these together, most will be traditional publicly-traded IOUs
	//and note I think some of these are actually publicly traded 
/////LARGE LDCS THAT ARE NOT IOUS - web search to verify these should be ious
	tab company_name if resi_total_custom>500000 & iou+private==0
	//br if company_name=="DTE GAS COMPANY"
	replace iou=1 if company_name=="DTE GAS COMPANY"
	tab company_name if resi_total_custom>100000 & iou+private==0
	//br if company_name=="CASCADE NAT GAS CORP" //note two states
	/*Cascade is a subsidiary of MDU Resources Group, Inc., a multidimensional natural resources enterprise traded on the New York Stock Exchange as “MDU.” For more information about MDU Resources, visit the company’s website at www.mdu.com. https://www.cngc.com/in-the-community/about-us/ */
	replace iou=1 if company_name=="CASCADE NAT GAS CORP"
	/*In 1887, Indianapolis civic leaders came up with the idea of operating a natural gas company as a Public Charitable Trust, solely for the benefit of customers and the community. Today, this Trust lives on as Citizens Energy Group, a broad-based utility service company, providing natural gas, thermal energy, water, and wastewater services to about 800,000 people and thousands of businesses in the Indianapolis area.  At Citizens, our vision is to fulfill the promise of the Trust to serve our customers and communities with unparalleled excellence and integrity. https://www.citizensenergygroup.com/Our-Company/About-Citizens*/
	/*As a citizen of Colorado Springs, you own your utility. And that means you have a voice in utilities decisions*/
	/*Today, MLGW is the nation's largest three-service municipal utility, serving more than 429,000 customers. Since 1939, MLGW has met the utility needs of Memphis and Shelby County residents by delivering reliable and affordable electricity, natural gas and water service.*/
	/*The Metropolitan Utilities District, or M.U.D., is the political subdivision and public corporation of the U.S. State of Nebraska that operates the drinking water and natural gas systems for Omaha, Nebraska and surrounding areas. M.U.D. is the only metropolitan utilities district in the State of Nebraska, and the fifth largest public natural gas utility in the U.S.[2]*/
	replace iou=1 if company_name=="SIERRA PACIFIC POWER COMPANY"
//////"CITY OF" AND "MUNI" THAT ARE NOT MUNI==1
	sum type_* iou muni if strmatch(company_name,"*CITY OF*")
	//99.4% muni - excellent
	//96.7% type_muni - excellent
	tab company_id_eia if strmatch(company_name,"*CITY OF*") & muni==0
	//br if company_id=="17614062AL"
	/*https://www.unionspringsherald.com/news/article_e8ad2142-d7ad-11e8-8ffe-af9398eecb0e.html 1,300 residents or customers rely on the utilities system for water, sewer and gas services.*/
	replace muni=1 if  company_id=="17614062AL"
	//br if company_id=="17648970TX"
	/*The population was 19 at the 2010 census, making it the municipality with the smallest population in Texas.[5]*/
	replace muni=1 if  company_id=="17648970TX"
		replace private=0 if company_id=="17648970TX"
	//br if company_id=="17695326KS"
	//0 resi customers
	sum type_* iou muni if strmatch(company_name,"*MUNICIP*")
	//97% muni - excellent
	//96% type_muni - excellent
	//tab company_id_eia if strmatch(company_name,"*MUNICIP*") & muni==0
	//br if company_id=="17600039IL"
	replace muni=1 if company_id=="17600039IL"
	//br if company_id=="17695404TX" //no resi customers
////////OTHER_DESCR variable
	tab other_descr if iou==1 & resi_total_custom!=0 & resi_total_custom!=. //fine
	tab other_descr if muni==1 & resi_total_custom!=0 & resi_total_custom!=. //fine
	tab other_descr if muni==0 & iou==0 & resi_total_custom!=0 & resi_total_custom!=. //fine
		//almost all utility irrigation district, county, etc -- nothing that looks like an IOU
//////////NOT YET CLASSIFIED	
	gsort -resi_total_custom
	unique company_id if iou+muni+private+coop==0 & resi_total_custom!=0 & resi_total_custom!=. //110
	//br if iou+muni+private+coop==0 & resi_total_custom!=0 & resi_total_custom!=.
	replace iou=1 if company_name=="ARKANSAS OKLAHOMA GAS CORPORATION"
		/*AOG has been owned by the family of Witt Stephens, Sr. for 72 years.*/
	replace muni=1 if company_name=="YORK CTY NATURAL GAS AUTHORITY"
		/*YCNGA was created by Act of the South Carolina General Assembly in 1954. It is therefore a political subdivision of the State that operates as a not-for-profit corporation and is governed by a ten-member Board.*/
	replace muni=1 if company_name=="MIDDLE TENNESSEE UTIL DIST"
		/*The Middle Tennessee Utility District of Cannon, Cumberland, DeKalb, Putnam, Rhea, Rutherford, Smith, Warren, White and Wilson Counties, Tennessee was created in July of 1955 as a municipal corporation pursuant to the Utility District Act of 1937, to provide natural gas service to residents, businesses and industries within its boundaries.*/
	replace iou=1 if company_name=="KANSAS GAS SVS CO" & state=="OK"
		/*Kansas Gas Service is a division of ONE Gas, Inc. (NYSE: OGS), a natural gas distribution company and the successor to the company founded in 1906 as Oklahoma Natural Gas Company, which became ONEOK, Inc. (NYSE: OKE) in 1980.*/
	replace muni=1 if company_name=="POWELL CLINCH UTILITY DISTRICT"
		/*Powell Clinch Utility District (PCUD) is a not-for-profit municipal utility serving portions of Anderson and Campbell counties. */
	replace muni=1 if company_name=="WILSON ENERGY" & state=="NC"
		/*Wilson Energy is the fourth-largest municipal electric system in North Carolina,*/
	replace muni=1 if company_name=="JAY UTILITIES AUTHORITY" & state=="OK"		
		/*Jay Utility Authority provides natural gas to customers in city limits and along Hwy 59 north of town. https://cityofjay.city/gas*/	
	replace muni=1 if company_name=="NAVAJO TRIBAL UTILITY AUTHORITY"
		/*not-for-profit, successful tribally-owned enterpris*/
	replace muni=1 if company_name=="POINTE COUPEE PARISH PUBLIC UTIL"
		/*http://pcparish.org/pointe-coupee-parish-gas-utilities-notice-2021/ */
	replace muni=1 if company_name=="ROBSTOWN UTIL SYS"
		/*Robstown Utility Systems is a not for profit entity established by the citizens of Robstown to create and maintain an effective System providing reliable electric, gas and wastewater */
	replace muni=1 if company_name=="NORTHEAST MS NATURAL GAS DISTRICT"
			/*Northeast MS Natural Gas District originally started as Mantachie Natural Gas District. Mantachie Natural Gas District was created by the State Legislature in 1961.  */
	sum iou muni coop if strmatch(company_name,"*UTIL*DIST*") & resi_total_custom!=0 & resi_total_custom!=.
	replace muni=1 if strmatch(company_name,"*UTIL*DIST*") & resi_total_custom!=0 & resi_total_custom!=.
	/*
	OKALOOSA GAS DISTRICT
	FORT HILL NAT GAS AUTH
	FORT COBB FUEL AUTHORITY
	CARDINAL BLUEFIELD GAS CO	
	CHICKASAWHAY NAT GAS	
	RATON NATURAL GAS	
	PG ENERGY DIV OF SOUTHERN UNIO
	CHELSEA GAS AUTHORITY
	LEANN GAS COMPANY	
	EASTERN STATES OIL & GAS INC	
	LIVINGSTON GAS & UTILITY CO
	ST AMANT GAS CO	
	WYOMING INDUSTRIAL GAS CO	
	EAST KENTUCKY UTILITIES	
	not obvious on websites, although type seems clear from names for some
	STOPPED AFTER THESE - EVERYTHING ELSE HAS <1000 RESI CUSTOMERS*/	

unique company_id if iou+private==1 & resi_total_custom!=0 & resi_total_custom!=. //459
unique company_id if iou+private==1 & resi_total_custom!=0 & resi_total_custom!=. & year==2013 
	//323 -- good; not too different from below
unique company_id if iou+private==0 & resi_total_custom!=0 & resi_total_custom!=. //1116
unique company_id if iou+private==0 & year==2013 & resi_total_custom!=0 & resi_total_custom!=. 
	//988 - good; not too different from below
unique company_id if muni+coop==1 & resi_total_custom!=0 & resi_total_custom!=. //1032
unique company_id if muni+coop==1 & resi_total_custom!=0 & resi_total_custom!=. & year==2013
	//952 -- good; not too different from below
	
sum resi_total_custom if iou+private==1 & year==2013
	disp r(mean)*r(N)/10^6
	//62 X 10^6
sum resi_total_custom if iou+private==0 & year==2013
	disp r(mean)*r(N)/10^6	
	//4.7 x 10^6
/*GOOD ROUGHLY MATCHES	import excel using data/aga/2019-fields.xlsx, firstrow clear
tab CompanyType
gen resicust=ResidentialSalesCustomers+ResidentialTransportationCusto
tab CompanyType if resicust!=0
                  Company Type |      Freq.     Percent        Cum.
-------------------------------+-----------------------------------
           Interstate Pipeline |          9        0.71        0.71
           Intrastate Pipeline |          9        0.71        1.42
    Investor Owned Distributor |        279       22.00       23.42
Municipally Owned Distribution |        943       74.37       97.79
                         Other |         28        2.21      100.00
-------------------------------+-----------------------------------
                         Total |      1,268      100.00
table CompanyType if resicust!=0, contents(sum resicust)
                  Company Type | sum(resicust)
-------------------------------+--------------
           Interstate Pipeline |         30115
           Intrastate Pipeline |         60232
    Investor Owned Distributor |      6.56e+07
Municipally Owned Distribution |       4617673
                         Other |         33987 */	
foreach v in iou private muni coop{
	rename `v' `v'_narrow
}
gen iou=max(iou, private)
	replace iou=0 if iou==. //puts muni, coop, and "other" into one category
	
compress
sum resi_total_volume iou muni if resi_total_cust==0 | resi_total_cust==. 
	//almost all have missing volume
	//br if (resi_total_cust==0 | resi_total_cust==.) & (resi_total_volume==0 | resi_total_volume==.)
drop if (resi_total_cust==0 | resi_total_cust==.) & (resi_total_volume==0 | resi_total_volume==.)
//br if iou==0 & muni==0
	sum resi_total_cust if iou==0 & muni==0 & private==0 & coop==0
		local temp1=r(mean)*r(N)
	sum resi_total_cust
		local temp2=r(mean)*r(N)
	disp `temp1'/`temp2' //0.001
	sum resi_total_cust if iou==0 & muni==0 & private==0 & coop==0
		local temp1=r(N)
	sum resi_total_cust
		local temp2=r(N)
	disp `temp1'/`temp2' //0.026
	drop other_descr
drop type* 
drop losses
order company_id company_name year state iou muni private coop resi_* comm_* indu_* elec_*
save data/eia176/eia176, replace



////////////////////////////
//CITYGATE PRICES
//volumes for quantity weighting
import excel using data/eia/NG_SUM_LSUM_A_EPG0_VGT_MMCF_M.xls, ///
	sheet("Data 1") cellrange(a2) firstrow clear //natural gas delivered to consumers
reshape long N3060, i(Sourcekey) j(state) string
replace state=substr(state,1,2)
rename N3060 quantity_total
gen date=date(Sourcekey,"DMY")
	gen year=year(date)
	gen month=month(date)
	drop if Sourcekey==""
	drop date Sourcekey
destring quantity_total, force replace
drop if quantity_total==.
save data/eia/state_volumes_total, replace
	//only starting in 2001

import excel using data/eia/NG_SUM_LSUM_A_EPG0_VCS_MMCF_M.xls, ///
	sheet("Data 1") cellrange(a2) firstrow clear //commercial volume
reshape long N3020, i(Sourcekey) j(state) string
replace state=substr(state,1,2)
rename N3020 quantity_comm
gen date=date(Sourcekey,"DMY")
	gen year=year(date)
	gen month=month(date)
	drop if Sourcekey==""
	drop date Sourcekey
destring quantity_comm, force replace
drop if quantity_comm==.
merge 1:1 state year month using data/eia/state_volumes_total, nogen
save data/eia/state_volumes, replace

import excel using data/eia/NG_SUM_LSUM_A_EPG0_VRS_MMCF_M.xls, ///
	sheet("Data 1") cellrange(a2) firstrow clear //resi volume
reshape long N3010, i(Sourcekey) j(state) string
replace state=substr(state,1,2)
rename N3010 quantity_resi
gen date=date(Sourcekey,"DMY")
	gen year=year(date)
	gen month=month(date)
	drop if Sourcekey==""
	drop date Sourcekey
destring quantity_resi, force replace
drop if quantity_resi==.
merge 1:1 state year month using data/eia/state_volumes, nogen
save data/eia/state_volumes, replace

//monthly citygate
import excel using data/eia/ng_pri_sum_a_epg0_pg1_dmcf_m.xls, ///
	sheet("Data 1") cellrange(a2) firstrow clear
reshape long N3050, i(Sourcekey) j(state) string
replace state=substr(state,1,2)
rename N3050 citygate_nominal_dmcf
gen date=date(Sourcekey,"DMY")
	gen year=year(date)
	gen month=month(date)
	drop if Sourcekey==""
	drop date Sourcekey
drop if strmatch(citygate,"*Citygate Price*")
destring, replace
save data/eia/citygate_monthly, replace

//eia-reported annual citygate
import excel using data/eia/ng_pri_sum_a_epg0_pg1_dmcf_a.xls, ///
	sheet("Data 1") cellrange(a2) firstrow clear
reshape long N3050, i(Sourcekey) j(state) string
replace state=substr(state,1,2)
rename N3050 citygate_nominal_dmcf
gen date=date(Sourcekey,"DMY")
	gen year=year(date)
	gen month=month(date)
	drop if Sourcekey==""
	drop date Sourcekey
drop month
drop if strmatch(citygate,"*Citygate Price*")
destring, replace
drop if city==.
save data/eia/citygate_annual, replace

//merging all of these
use data/eia/state_volumes, clear
merge 1:1 state year month using data/eia/citygate_monthly, nogen
bysort state year: egen citygate_alt_simple_nominal=mean(citygate_nominal)
	replace citygate_alt_simple_nominal=. if citygate_alt_simple_nominal==0
bysort state year: egen citygate_alt_wtall_nominal=wtmean(citygate_nominal), weight(quantity_total)
	replace citygate_alt_wtall_nominal=. if citygate_alt_wtall_nominal==0
bysort state year: egen citygate_alt_wtresi_nominal=wtmean(citygate_nominal), weight(quantity_resi)
	replace citygate_alt_wtresi_nominal=. if citygate_alt_wtresi_nominal==0
bysort state year: egen citygate_alt_wtcomm_nominal=wtmean(citygate_nominal), weight(quantity_comm)
	replace citygate_alt_wtcomm_nominal=. if citygate_alt_wtcomm_nominal==0
keep state year citygate_alt*
duplicates drop
merge 1:1 state year using data/eia/citygate_annual, nogen
keep if year>=1997 //our sample
sum city*
pwcorr city*
//highly correlated across all four
//reported annual citygate is most closely correlated with the residential-weighted one
//assume DC has Marylan'ds citygate price
sum citygate* if state=="DC" //all blank
	drop if state=="DC" 
	expand 2 if state=="MD", gen(temp)
	replace state="DC" if state=="MD" & temp==1
	drop temp
	isid state year
save data/eia/citygate, replace


////////////////////////
//PHMSA DATA
forval v=1990(1)2009{
	import excel using data/phmsa/annual_gas_distribution_`v'.xlsx, ///
		firstrow cellrange(a1) allstring clear
	foreach x in DIM_1 DIM_2 DIM_3 DIM_4 DIM_5 DIM_6 DIS_1 DIS_2 DIS_3 DIS_4 DIS_5 DIS_6 {
		//note we don't actually use these. ductile iron miles of mains in the system at end of year, by diameter.
		capture rename `x'Integer `x'
	}
	save data/phmsa/annual_gas_distribution_`v', replace
}
clear all
forval v=1990(1)2009{
	append using data/phmsa/annual_gas_distribution_`v'
}
drop RPTID
rename YR year
	replace year="19"+year if length(year)==2
		//br if year==""
		drop if year==""
	rename NAME operator_name
	rename OPID operator_id_phmsa
		replace operator_id_phmsa=OPERATOR_ID if operator_id_phmsa==""
		drop OPERATOR_ID
	drop OSTREET OCITY OCOUNTY OZIP
	drop HSTREET HCITY HCOUNTY HZIP
	rename OSTATE office_address_state
		replace office_address_state=ST if office_address_state==""
		replace office_address_state=OFSTATE if office_address_state==""
		drop ST OFSTATE
	rename HSTATE hq_address_state	
		replace hq_address_state=HST if hq_address_state==""
		replace hq_address_state=HQSTATE if hq_address_state==""
		drop HST HQSTATE
	rename STOP operating_state
//"The following fields are the miles of mains in the system at end of year, by material."	
//for 1990-2009, there are four possible versions of the "totals" variables:
//B1MT, the rowtotal of individual B1M variables (miles by type), TOTM, and rowtotal of TOT* (miles by diameter)
//any of these four might have reporting errors, but these should be infrequent
destring B1M_* B1MT, replace	
	rename B1MT mmiles_total_version1a 
	egen mmiles_total_version1b=rowtotal(B1M_*)
	compare mmiles_total_version1a mmiles_total_version1b //identical for 95%
	corr mmiles_total_version1a mmiles_total_version1b //0.999
	sum mmiles_total_version1a mmiles_total_version1b
	/*br year operator* mmiles* B1M* if abs(mmiles_total_version1a-mmiles_total_version1b)>2 ///
		& abs(mmiles_total_version1a-mmiles_total_version1b)!=.*/
		//very very few, and I think the main discrepancy one won't show up in our sample
	drop B1M_*
rename B1ST num_srvs_total_version1 
/*The following fields are the totals for the miles of mains in the system at end of year, 
by material, by diameter.*/
destring TOTM_* TOTMT, replace	
	rename TOTMT mmiles_total_version2a
	egen mmiles_total_version2b=rowtotal(TOTM_*)
	compare mmiles_total_version2a mmiles_total_version2b
	corr mmiles_total_version2a mmiles_total_version2b
	sum mmiles_total_version2a mmiles_total_version2b
	/*br year operator* mmiles* TOTM* if abs(mmiles_total_version2a-mmiles_total_version2b)>2 ///
		& abs(mmiles_total_version2a-mmiles_total_version2b)!=.*/
		//very very few
	drop TOTM_*
rename AVER average_length
rename TOTST num_srvs_total_version2
keep year operator* *state mmiles* num_srvs*
sum mmiles*
pwcorr mmiles*
//very similar. least correlated is 2b (adding up miles by diameter)
compress
save data/phmsa/annual_gas_distribution_1990_2009, replace



forval v=2010(1)2019{
	import excel using data/phmsa/annual_gas_distribution_`v'.xlsx, ///
		firstrow cellrange(a3) allstring clear
	save data/phmsa/annual_gas_distribution_`v', replace
}
clear all
forval v=2010(1)2019{
	append using data/phmsa/annual_gas_distribution_`v'
}
foreach var of varlist * {
  local a :  variable label `var'
  local b = lower("`a'")
  rename `var' `b'
}
rename num_srvcs_total num_srvs_total
sum if report_year=="" & report_number==""
	drop if report_year=="" & report_number==""
drop datafile_as_of
drop report_number supplemental_number
rename report_year year
rename operator_id operator_id_phmsa
rename stop operating_state
keep year operator* *state mmiles_total* num_srvs_total*
compress
save data/phmsa/annual_gas_distribution_2010_2019, replace


use data/phmsa/annual_gas_distribution_1990_2009, clear
append using data/phmsa/annual_gas_distribution_2010_2019
destring, replace
foreach v in operator_name office_address_state hq_address_state operating_state{
	replace `v'=trim(`v')
}
sort operator_id_phmsa year
rename operating_state state
drop if num_srvs_total_version1==0 & num_srvs_total_version2==0 
replace state=upper(state)
//fixing some incorrects and missings by hand, only if no ambiguity over multi-state companies
//these are found by running the next ten or so lines, then browse if state==""
replace state="CA" if state=="LA" & operator_id_phmsa==18480
	replace state="TX" if state=="TN TX" & operator_id_phmsa==1720
	replace state="AL" if state=="LA" & operator_id_phmsa==2270
	replace state="TX" if state=="TN TX" & operator_id_phmsa==2308
	replace state="MS" if (state=="MI" | state=="NC") & operator_id_phmsa==2336
	replace state="NY" if state=="NC NY" & operator_id_phmsa==5160
	replace state="AL" if state=="AK" & operator_id_phmsa==5700
	replace state="NM" if state=="" & operator_id_phmsa==7285
	replace state="TX" if state=="TN TX" & operator_id_phmsa==7680
	replace state="AL" if state=="AK" & operator_id_phmsa==11864
	replace state="OK" if state=="OH OK" & operator_id_phmsa==12546
	replace state="NC" if state=="" & operator_id_phmsa==13570
	replace state="MI" if state=="MT" & operator_id_phmsa==15845
replace state=office_address_state if state=="" & office_address_state==hq_address_state
	//applies to <.1% of observations
	//1% still missing. can't use other years, because the unique identifier is id/state, not just id
	gen temp1=1 if state!=""
	encode state, gen(temp2) //so i can calculate sd, a quick way of finding non-uniqueness
	replace temp2=. if temp1==. //i don't worry about non-uniqueness if it was missing
	bysort operator_id: egen temp3=sd(temp2)
	replace state=office_address_state if state=="" & hq_address_state=="" & temp3==0
	replace state=office_address_state if state=="" & hq_address_state=="" & temp3==.
		//browse - these don't look like multi-state companies; they look like munis.
		//but temp3 ensures they are not multi-state
		//this fills in state for 416+16 observations
	drop temp1 temp2 temp3
	drop if state=="" //only applies to 22. can't be merged with EIA, which is at state level.
duplicates drop //some duplicate entries. might be because they sent in a corrected form, which didn't apply to the variables we kept
	duplicates tag state operator_id_phmsa year, gen(dup)
		//occasionally just a name change, but usually different miles observations and i don't know which is right
	keep if dup==0 //96%
	drop dup
compress
save data/phmsa/annual_gas_distribution_1990_2019, replace




//////////////////////
//CPI 
import excel using data/cpi/CPALTT01USA661S.xls, cellrange(a11) firstrow clear
gen year=year(obs)
rename CPA cpi
keep year cpi
isid year
save data/cpi/cpi, replace	

sum cpi if year==2019
global cpi2019=r(mean)



/////////////////////
//FIPS
import excel using "data/fips/state codes.xlsx", firstrow clear
rename StateAbbr state_abbr
rename FIPS fips
rename StateName state_name
isid fips
save data/fips/state_fips, replace


////////////////
//REGIONS AND DIVISIONS
import excel using data/census/state_geocodes_v2011.xls, firstrow cellrange(a6) clear
rename Region region
rename Division division
rename StateFIPS fips
drop if fips=="00" //region,division names
drop Name
destring, replace
isid fips
save data/census/census_regions, replace


//////////////////////////
//WEATHER - ANNUAL BY STATE FROM NOAA
/*
forval year=1997(1)2020{
	copy https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/`year'/StatesCONUS.Cooling.txt data/noaa/cool`year'.txt
	copy https://ftp.cpc.ncep.noaa.gov/htdocs/degree_days/weighted/daily_data/`year'/StatesCONUS.Heating.txt data/noaa/heat`year'.txt
}
*/
clear all
foreach type in Cool Heat{
forval year=1997(1)2020{ 
	import delimited using data/noaa/`type'`year'.txt, ///
		delimiter("|") rowrange(4) stringcols(_all) clear
	rename v1 state_abbr
	destring, replace
	egen `type'=rowtotal(v*)
	drop if state_abbr=="Region"
	drop v*
	gen year=`year'
	compress
	save data/noaa/`type'`year', replace
}
}			
foreach type in Cool Heat{
	clear all
	forval year=1997(1)2020{ 
		append using data/noaa/`type'`year'
	}
	isid state year
save data/noaa/`type', replace
}			
use data/noaa/cool, clear
merge 1:1 state year using data/noaa/heat
rename Cool cdd
rename Heat hdd
drop _m
expand 2 if state=="MD", gen(temp) //assume DC has Maryland's weather
	replace state="DC" if state=="MD" & temp==1
	isid state year
	drop temp
save data/noaa/weather, replace

sum cdd hdd if state=="MN"
sum cdd hdd if state=="TX"		




///////////////////////////
//MERGING ALL
use data/phmsa/annual_gas_distribution_1990_2019, clear
//year operator_name operator_id_phmsa operating_state
//use most recent name to match
gen temp1=operator_name if year==2019
	bysort operator_id_phmsa: egen temp2=mode(temp1)
	gen temp3=temp2
	drop temp1 temp2
	forval v=2018(-1)1990{
		gen temp1=operator_name if year==`v'
		bysort operator_id_phmsa: egen temp2=mode(temp1)
		replace temp3=temp2 if temp3==""
		drop temp1 temp2
	}
	drop operator_name
	rename temp3 operator_name
keep operator_name operator_id_phmsa state year
duplicates drop
isid state operator_id_phmsa year
replace operator_name=upper(operator_name)
replace operator_name=subinstr(operator_name,"."," ",.)
replace operator_name=subinstr(operator_name,","," ",.)
replace operator_name=subinstr(operator_name,"  "," ",.)
replace operator_name=subinstr(operator_name,"  "," ",.)
gen name=operator_name
//by hand updates:
	replace name="NICOR GAS" if operator_id==13710 & state=="IL"
	//And, in some years this is the name in PHMSA
	//No other string matches
	replace name="QUESTAR GAS COMPANY" if operator_id==12876 & state=="UT"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="BGE" if operator_id==1088 & state=="MD"
	//abbrevation for what's in PHMSA
	//no other string matches
	replace name="KANSAS GAS SERVICE COMPANY" if operator_id==31159 & state=="KS"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="PNM GAS SERVICES" if operator_id==6141 & state=="NM"
	//Public Service Company of New Mexico is a corporation that is wholly owned by PNM Resources.
	//https://en.wikipedia.org/wiki/PNM_Resources#Public_Service_Company_of_New_Mexico_(PNM)
	replace name="SAN ANTONIO PUB SVC BD" if operator_id==18104 & state=="TX"
	replace name="VECTREN OF OHIO" if operator_id==31470 & state=="OH"
	replace name="CITIZENS ENERGY GROUP" if operator_id==2392 & state=="IN"
	//different abbreviation
	//no other string matches
	replace name="NARRAGANSETT ELECTRIC CO GAS DIV RI" if operator_id==13480 & state=="RI"
	//different abbreviation
	//no other string matches
	//https://en.wikipedia.org/wiki/National_Grid_plc
	//National Grid United States operates The Narragansett Electric Company (in Rhode Island);
	replace name="MINNESOTA ENERGY RESOURCES" if operator_id==32198 & state=="MN"
	//in some years this is the name in PHMSA
	replace name="CENTRAL ILLINOIS LT CO" if operator_id==2200 & state=="IL"
	//in some years this is the name in PHMSA
	replace name="WISCONSIN POWER AND LIGHT COMPANY" if operator_id==22784 & state=="WI"
	//in some years this is the name in PHMSA
	replace name="SIERRA PACIFIC POWER COMPANY" if operator_id==18308 & state=="NV"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="CENTRAL ILLINOIS PUB SVC CO" if operator_id==2204 & state=="IL"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="BLACK HILLS GAS DISTRIBUTION LLC" if operator_id==15359 & state=="NE"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="PIEDMONT NATURAL GAS" if operator_id==15518 & state=="SC"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="COLUMBIA GAS DIST CO" if operator_id==2585 & state=="KY"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="LIBERTY UTILITIES DBA ENERGY NORTH N" if operator_id==16667 & state=="NH"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="BLACK HILLS GAS DISTRIBUTION LLC" if operator_id==15359 & state=="CO"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="UNITED CITIES GAS CO" if operator_id==20211 & state=="KS"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="FLORIDA PUBLIC UTILITIES" if operator_id==5330 & state=="FL"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="FORT HILL NAT GAS AUTH" if operator_id==5410 & state=="SC"
	//different abbreviation
	//no other string matches
	replace name="LAS CRUCES MUN GAS" if operator_id==11240 & state=="NM"
	//different abbreviation
	//no other string matches
	replace name="SOUTHEAST ALABAMA GAS DISTRICT" if operator_id==18456 & state=="AL"
	//different abbreviation
	//no other string matches
	replace name="INTERSTATE PWR CO" if operator_id==8170 & state=="IA"
	//different abbreviation
	//no other string matches
	replace name="DULUTH CITY OF" if operator_id==3590 & state=="MN"
	//different abbreviation
	//no other string matches
	replace name="NORTHERN UTILITIES INC" if operator_id==13800 & state=="NH"
	//different abbreviation
	//no other string matches
	replace name="QUESTAR GAS COMPANY" if operator_id==12876 & state=="WY"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="CITY OF MOSS POINT" if operator_id==12816 & state=="MS"
	//different abbreviation
	//no other string matches
	//SIZE?
	replace name="ARKANSAS WESTERN GAS CO" if operator_id==621 & state=="AR"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="UNION ELECTRIC COMPANY" if operator_id==20050 & state=="IL"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="LIBERTY UTILITIES-ST LAWRENCE GAS" if operator_id==18084 & state=="NY"
	//different abbrev
	//no other string matches
	replace name="FLORENCE NATURAL GAS DEPT CITY OF" if operator_id==5300 & state=="AL"
	//different abbrev
	//no other string matches
	replace name="KINDER MORGAN" if operator_id==10030 & state=="CO"
	//in some years this is the name in PHMSA
	//no other string matches
	//SIZE?
	replace name="OWATONNA PUB UTIL" if operator_id==14410 & state=="MN"
	//different abbrev
	//no other string matches
	replace name="SOUTHERN MISSOURI GAS COMPANY LP" if operator_id==30992 & state=="MO"
	//different abbrev
	//no other string matches
	replace name="DALTON UTILITIES" if operator_id==3090 & state=="GA"
	//different abbrev
	//no other string matches
	replace name="SOMERVILLE TOWN OF" if operator_id==18392 & state=="TN"
	//different abbrev
	//no other string matches
	replace name="CITIZENS COMMUNICATIONS SANTA CRUZ" if operator_id==2416 & state=="AZ"
	//different abbrev
	//no other string matches
	replace name="SYLACAUGA UTILITIES BOARD" if operator_id==18784 & state=="AL"
	//different abbrev
	//no other string matches
	replace name="PIVOTAL UTILITIES DBA ELKTON GAS" if operator_id==4376 & state=="MD"
	//different abbrev
	//no other string matches
	replace name="ENERGY WEST WYOMING" if operator_id==2537 & state=="WY"
	//in some years this is the name in PHMSA
	//no other string matches
	replace name="HUMPREYS COUNTY UTILITIES DIST" if operator_id==7655 & state=="TN"
	//different abbrev
	//no other string matches
	replace name="ST JOSEPH L P EMPIRE" if operator_id==18072 & state=="MO"
	//different abbrev
	//no other string matches
	//check EMPIR*
	replace name="WEST BATON ROUGE PARISH NAT GAS" if operator_id==22364 & state=="LA"
	//different abbrev
	//no other string matches
	replace name="EMW GAS ASSN" if operator_id==4005 & state=="NM"
	//different abbrev
	//no other string matches
	replace name="ALABAMA GAS CORP" if operator_id==180 & state=="AL"
	//in some years this is the name in PHMSA
	replace name="ATHENS GAS DEPT CITY OF" if operator_id==766 & state=="AL"
	//different abbrev
	//no other string matches
	replace name="RIVIERA UTILITIES" if operator_id==5340 & state=="AL"
	//different abbrev
	//no other string matches	
	replace name="CLARKE MOBILE COUNTIES GAS DISTRICT" if operator_id==2456 & state=="AL"
		//note this will match to two different eia ids, across two sets of years
	replace name="PEOPLE GAS SYS" if operator_id==15348 & state=="FL"
		//others with same name but sporadic years and small size
	replace name="INTERSTATE POWER AND LIGHT COMPANY" if operator_id==8170 & state=="IA"
	replace name="MORTON VILLAGE OF" if operator_id==31859 & state=="IL"
	replace name="MORTON VILLAGE OF" if operator_id==12810 & state=="IL"
		//two different ids, but different years so ok
	replace name="PEOPLES NAT GAS CO" if operator_id==15359 & state=="KS"
	replace name="CENTERPOINT ENERGY ENTEX" if operator_id==4499 & state=="LA"
	replace name="CENTERPOINT ENERGY ARKLA" if operator_id==603 & state=="LA"
	replace name="ATMOS ENERGY CORPORATION" if operator_id==11800 & state=="LA"
		//note this will match to two different eia ids, across two sets of years				
	replace name="HOUMA MUNICIPAL GAS SYSTEM" if operator_id==19205 & state=="LA"
		//website, I think			
	replace name="NORTHERN STATES PWR CO" if operator_id==31636 & state=="MN"
	replace name="NORTHERN STATES PWR CO" if operator_id==13782 & state=="MN"
	replace name="NORTHERN STATES PWR CO" if operator_id==13780 & state=="MN"
		//three different ids, but different years so ok		
	replace name="MERC PNG" if operator_id==32198 & state=="MN"
		//different abbreviation
	replace name="MINNESOTA ENERGY RESOURCES - NMU" if operator_id==13707 & state=="MN"
		//same name, basing off size	
	replace name="PIEDMONT NATURAL GAS" if operator_id==15518 & state=="NC"
		//will match to two EIA ids across different years; fine
	replace name="THE BROOKLYN UNION GAS CO" if operator_id==1800 & state=="NY"
	replace name="NATIONAL FUEL GAS DIST NY" if operator_id==13062 & state=="NY"
		//one other with similar name but only listing 17 customers
	replace name="EAST OHIO GAS COMP DOMINION EAST OH" if operator_id==4060 & state=="OH"
	replace name="EAST OHIO GAS COMP DOMINION EAST OH" if operator_id==30880 & state=="OH"
		//different ids but across different years; fine
	replace name="PEOPLES NATURAL GAS COMPANY" if operator_id==15350 & state=="PA"
		//will match to two EIA ids across different years; fine
	replace name="YORK CTY NATURAL GAS AUTHORITY" if operator_id==24030 & state=="SC"
		//different abbrev
	replace name="CENTERPOINT ENERGY ENTEX" if operator_id==4499 & state=="TX"
	replace name="CENTERPOINT ENERGY ARKLA" if operator_id==603 & state=="TX"	
	replace name="WASHINGTON GAS LIGHT COMPANY" if operator_id==22182 & state=="VA"	
		//will match to two EIA ids across different years; fine	
		//MAKE SURE IT MATCHES TO THE EIA ONE THAT HAS "AND"	
	replace name="WISCONSIN PUB SVC CORP" if operator_id==22791 & state=="WI"	
		//will match to two EIA ids across different years; fine	
	replace name="MOUNTAINEER GAS CO" if operator_id==12878 & state=="WV"	
gen match_by_hand=0
	replace match_by_hand=1 if name!=operator_name
egen id_phmsa=group(state operator_id_phmsa year)
drop operator_name
tostring year, gen(stateyear)
	replace stateyear=state+stateyear
	drop if length(stateyear)>6 //multiple states; won't match to eia. 247 observations, <1% of observations
	drop state year
save data/phmsa/phmsaid, replace


//bringing phmsa ids into actual 176 data
//below, i bring in the phmsa data - for now, just the ids
//make sure not to drop any eia observations
use data/eia176/eia176, clear
count //30,169
count if company_id_eia=="" //0
rename company_name name
replace name=upper(name)
replace name=subinstr(name,"."," ",.)
replace name=subinstr(name,","," ",.)
replace name=subinstr(name,"  "," ",.)
replace name=subinstr(name,"  "," ",.)
egen id_eia=group(state company_id_eia year)
tostring year, gen(stateyear)
	replace stateyear=state+stateyear
reclink name stateyear using data/phmsa/phmsaid, ///
	gen(match_quality) idm(id_eia) idu(id_phmsa) orblock(stateyear) 
tab _m
//1573 did not match
count //30400
drop _m
compare stateyear Ustateyear
	drop stateyear Ustateyear
rename name company_name_eia
duplicates tag id_eia, gen(dup)
	tab dup, m
	disp 29938+462/2 //30169 as above
	//dropping because multiple company_id_eia observations, and only keeping the correct ones
	drop if company_id_eia=="17600492MN" & strmatch(Uname,"*ARGYLE*")==0 & dup==1
	drop if company_id_eia=="17617084IL" & strmatch(Uname,"*FRANKLIN*")==0 & dup==1
	drop if company_id_eia=="17647093TN" & strmatch(Uname,"*ADAMSVILLE*")==0 & dup==1
	drop if company_id_eia=="17671665LA" & strmatch(Uname,"*ELIZABETH*")==0 & dup==1
	drop dup
	duplicates tag id_eia, gen(dup)
	tab dup, m
	disp 30019+300/2 //30169 as above
	foreach v in match_quality id_phmsa operator_id match_by_hand{
		replace `v'=. if dup!=0
	}
	foreach v in Uname{
		replace `v'="" if dup!=0
	}
	duplicates drop
	count //30169 as expected
	drop dup
duplicates tag id_phmsa, gen(dup)	
tab dup
	//false matches, that we can fix by hand to keep the good part of that utility's match:
	foreach v in match_quality id_phmsa operator_id match_by_hand{
		replace `v'=. if strmatch(Uname,"*WESTLAKE*")==1 & strmatch(company_name,"*WESTLAKE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*TALLAHASSEE*")==1 & strmatch(company_name,"*TALLAHASSEE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*WANN*")==1 & strmatch(company_name,"*WANN*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*WHITE CASTLE*")==1 & strmatch(company_name,"*WHITE CASTLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*FAIRFAX*")==1 & strmatch(company_name,"*FAIRFAX*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*ADAIRSVILLE*")==1 & strmatch(company_name,"*ADAIRSVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*RIVERTON*")==1 & strmatch(company_name,"*RIVERTON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*PIKEVILLE*")==1 & strmatch(company_name,"*PIKEVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*LIVONIA*")==1 & strmatch(company_name,"*LIVONIA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*FRANKLINTON*")==1 & strmatch(company_name,"*FRANKLINTON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*BURBANK*")==1 & strmatch(company_name,"*BURBANK*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*JAY *")==1 & strmatch(company_name,"*JAY *")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*ADAMSVILLE*")==1 & strmatch(company_name,"*ADAMSVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*MARION*")==1 & strmatch(company_name,"*MARION*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*PARSONS*")==1 & strmatch(company_name,"*PARSONS*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*DEVINE*")==1 & strmatch(company_name,"*DEVINE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*JOAQUIN*")==1 & strmatch(company_name,"*JOAQUIN*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*SOUTH ALABAMA*")==1 & strmatch(company_name,"*SOUTH ALABAMA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*FLOMATON*")==1 & strmatch(company_name,"*FLOMATON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*T VERNON*")==1 & strmatch(company_name,"*T VERNON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*TRUSSVILLE*")==1 & strmatch(company_name,"*TRUSSVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*FLORALA*")==1 & strmatch(company_name,"*FLORALA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CARTERSVILLE*")==1 & strmatch(company_name,"*CARTERSVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*ELBERTON*")==1 & strmatch(company_name,"*ELBERTON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*LAGRANGE*")==1 & strmatch(company_name,"*LAGRANGE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*SYLVESTER*")==1 & strmatch(company_name,"*SYLVESTER*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*TRION*")==1 & strmatch(company_name,"*TRION*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*TIPTON*")==1 & strmatch(company_name,"*TIPTON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CLAY CITY*")==1 & strmatch(company_name,"*CLAY CITY*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*VIENNA*")==1 & strmatch(company_name,"*VIENNA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CONCORDIA*")==1 & strmatch(company_name,"*CONCORDIA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*BURKESVILLE*")==1 & strmatch(company_name,"*BURKESVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CLINTON*")==1 & strmatch(company_name,"*CLINTON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*FULTON*")==1 & strmatch(company_name,"*FULTON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*HAZARD*")==1 & strmatch(company_name,"*HAZARD*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*MORGANTOWN*")==1 & strmatch(company_name,"*MORGANTOWN*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*GONZALES*")==1 & strmatch(company_name,"*GONZALES*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*NEW ROADS*")==1 & strmatch(company_name,"*NEW ROADS*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*ST AMANT*")==1 & strmatch(company_name,"*ST AMANT*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*VIDALIA*")==1 & strmatch(company_name,"*VIDALIA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*GRANT CITY*")==1 & strmatch(company_name,"*GRANT CITY*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*RALEIGH*")==1 & strmatch(company_name,"*RALEIGH*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*RIPLEY*")==1 & strmatch(company_name,"*RIPLEY*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*WALNUT GROVE*")==1 & strmatch(company_name,"*WALNUT GROVE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*MONROE*")==1 & strmatch(company_name,"*MONROE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CHELSEA*")==1 & strmatch(company_name,"*CHELSEA*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CHOUTEAU*")==1 & strmatch(company_name,"*CHOUTEAU*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*HASKELL*")==1 & strmatch(company_name,"*HASKELL*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*VICI P*")==1 & strmatch(company_name,"*VICI P*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*WALLY*")==1 & strmatch(company_name,"*WALLY*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*COOKEVILLE*")==1 & strmatch(company_name,"*COOKEVILLE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*HENDERSON*")==1 & strmatch(company_name,"*HENDERSON*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*CHESTER*")==1 & strmatch(company_name,"*CHESTER*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*DILLEY*")==1 & strmatch(company_name,"*DILLEY*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*PINELAND*")==1 & strmatch(company_name,"*PINELAND*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*STOCKDALE*")==1 & strmatch(company_name,"*STOCKDALE*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*SUNDOWN*")==1 & strmatch(company_name,"*SUNDOWN*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*EPCOR GAS*")==1 & strmatch(company_name,"*EPCOR GAS*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*T & L*")==1 & strmatch(company_name,"*T AND L*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*EQUITABLE GAS CO*")==1 & strmatch(company_name,"*EQUITABLE GAS CO*")==0 & dup!=0
		replace `v'=. if strmatch(Uname,"*W G GAS*")==1 & strmatch(company_name,"*W G GAS*")==0 & dup!=0
	}	
	foreach v in Uname{
		replace `v'="" if strmatch(Uname,"*WESTLAKE*")==1 & strmatch(company_name,"*WESTLAKE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*TALLAHASSEE*")==1 & strmatch(company_name,"*TALLAHASSEE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*WANN*")==1 & strmatch(company_name,"*WANN*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*WHITE CASTLE*")==1 & strmatch(company_name,"*WHITE CASTLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*FAIRFAX*")==1 & strmatch(company_name,"*FAIRFAX*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*ADAIRSVILLE*")==1 & strmatch(company_name,"*ADAIRSVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*RIVERTON*")==1 & strmatch(company_name,"*RIVERTON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*PIKEVILLE*")==1 & strmatch(company_name,"*PIKEVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*LIVONIA*")==1 & strmatch(company_name,"*LIVONIA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*FRANKLINTON*")==1 & strmatch(company_name,"*FRANKLINTON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*BURBANK*")==1 & strmatch(company_name,"*BURBANK*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*JAY *")==1 & strmatch(company_name,"*JAY *")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*ADAMSVILLE*")==1 & strmatch(company_name,"*ADAMSVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*MARION*")==1 & strmatch(company_name,"*MARION*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*PARSONS*")==1 & strmatch(company_name,"*PARSONS*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*DEVINE*")==1 & strmatch(company_name,"*DEVINE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*JOAQUIN*")==1 & strmatch(company_name,"*JOAQUIN*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*SOUTH ALABAMA*")==1 & strmatch(company_name,"*SOUTH ALABAMA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*FLOMATON*")==1 & strmatch(company_name,"*FLOMATON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*T VERNON*")==1 & strmatch(company_name,"*T VERNON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*TRUSSVILLE*")==1 & strmatch(company_name,"*TRUSSVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*FLORALA*")==1 & strmatch(company_name,"*FLORALA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CARTERSVILLE*")==1 & strmatch(company_name,"*CARTERSVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*ELBERTON*")==1 & strmatch(company_name,"*ELBERTON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*LAGRANGE*")==1 & strmatch(company_name,"*LAGRANGE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*SYLVESTER*")==1 & strmatch(company_name,"*SYLVESTER*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*TRION*")==1 & strmatch(company_name,"*TRION*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*TIPTON*")==1 & strmatch(company_name,"*TIPTON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CLAY CITY*")==1 & strmatch(company_name,"*CLAY CITY*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*VIENNA*")==1 & strmatch(company_name,"*VIENNA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CONCORDIA*")==1 & strmatch(company_name,"*CONCORDIA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*BURKESVILLE*")==1 & strmatch(company_name,"*BURKESVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CLINTON*")==1 & strmatch(company_name,"*CLINTON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*FULTON*")==1 & strmatch(company_name,"*FULTON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*HAZARD*")==1 & strmatch(company_name,"*HAZARD*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*MORGANTOWN*")==1 & strmatch(company_name,"*MORGANTOWN*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*GONZALES*")==1 & strmatch(company_name,"*GONZALES*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*NEW ROADS*")==1 & strmatch(company_name,"*NEW ROADS*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*ST AMANT*")==1 & strmatch(company_name,"*ST AMANT*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*VIDALIA*")==1 & strmatch(company_name,"*VIDALIA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*GRANT CITY*")==1 & strmatch(company_name,"*GRANT CITY*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*RALEIGH*")==1 & strmatch(company_name,"*RALEIGH*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*RIPLEY*")==1 & strmatch(company_name,"*RIPLEY*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*WALNUT GROVE*")==1 & strmatch(company_name,"*WALNUT GROVE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*MONROE*")==1 & strmatch(company_name,"*MONROE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CHELSEA*")==1 & strmatch(company_name,"*CHELSEA*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CHOUTEAU*")==1 & strmatch(company_name,"*CHOUTEAU*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*HASKELL*")==1 & strmatch(company_name,"*HASKELL*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*VICI P*")==1 & strmatch(company_name,"*VICI P*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*WALLY*")==1 & strmatch(company_name,"*WALLY*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*COOKEVILLE*")==1 & strmatch(company_name,"*COOKEVILLE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*HENDERSON*")==1 & strmatch(company_name,"*HENDERSON*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*CHESTER*")==1 & strmatch(company_name,"*CHESTER*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*DILLEY*")==1 & strmatch(company_name,"*DILLEY*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*PINELAND*")==1 & strmatch(company_name,"*PINELAND*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*STOCKDALE*")==1 & strmatch(company_name,"*STOCKDALE*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*SUNDOWN*")==1 & strmatch(company_name,"*SUNDOWN*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*EPCOR GAS*")==1 & strmatch(company_name,"*EPCOR GAS*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*T & L*")==1 & strmatch(company_name,"*T AND L*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*EQUITABLE GAS CO*")==1 & strmatch(company_name,"*EQUITABLE GAS CO*")==0 & dup!=0
		replace `v'="" if strmatch(Uname,"*W G GAS*")==1 & strmatch(company_name,"*W G GAS*")==0 & dup!=0
	}
drop dup

	duplicates tag id_phmsa, gen(dup)
	tab dup, m
	//false matches that we can't fix by hand, 
	//ie i've looked through these and don't see an obviously good fit:
	foreach v in match_quality id_phmsa operator_id match_by_hand{
		replace `v'=. if dup!=0
	}	
	foreach v in Uname{
		replace `v'="" if dup!=0
	}	
	drop dup
	duplicates tag id_phmsa, gen(dup)
	tab dup, m	
	drop dup
	replace match_q=1 if company_name_eia=="FLORALA GAS DEPARTMENT" & Uname=="FLORALA UTILITY BOARD CITY OF"
	replace match_q=1 if company_name_eia=="KEVIL CITY OF" & Uname=="KEVIL NATURAL GAS SYSTEM"
	replace match_q=1 if company_name_eia=="RICHMOND UTILITIES BOARD" & Uname=="RICHMOND NATURAL GAS & SEWAGE WKS"
	replace match_q=1 if company_name_eia=="VILLAGE OF ELIZABETH" & Uname=="ELIZABETH TOWN OF"
	replace match_q=1 if company_name_eia=="KENNETT BOARD OF PUBLIC WORKS" & Uname=="KENNETT MUNICIPAL GAS SYSTEM"
	replace match_q=1 if company_name_eia=="PONCA PUBLIC UTILITIES" & Uname=="PONCA GAS DEPT"
	replace match_q=1 if company_name_eia=="PIVOTAL UTILITY DBA ELIZABETHTOWN" & Uname=="ELIZABETHTOWN GAS CO"
	replace match_q=1 if company_name_eia=="CITY OF CLEVELAND" & Uname=="CLEVELAND GAS DEPT"
	replace match_q=1 if company_name_eia=="FAIRFAX TOWN OF" & Uname=="FAIRFAX PUBLIC WORKS AUTHORITY"
	replace match_q=1 if company_name_eia=="JONES TOWN OF" & Uname=="JONES CITY MUNICIPAL GAS SYSTEM"
	replace match_q=1 if company_name_eia=="SLICK TOWN OF" & Uname=="SLICK PUBLIC WORKS AUTH"
	replace match_q=1 if company_name_eia=="ADAMSVILLE CITY OF" & Uname=="TOWN OF ADAMSVILLE GAS DEPT"
	replace match_q=1 if company_name_eia=="SELMER PUB WKS" & Uname=="SELMER NATURAL GAS SYSTEM"
	replace match_q=1 if company_name_eia=="MGTC INC" & Uname=="M G T C INC"
	foreach v in match_quality id_phmsa operator_id match_by_hand{
		replace `v'=. if match_q<.8 | match_q==.
	}	
	foreach v in Uname{
		replace `v'="" if match_q<.8 | match_q==.
	}				
		
	foreach v in match_quality id_phmsa operator_id match_by_hand{
		replace `v'=. if company_name_eia=="MONROE UTILITIES CITY OF" & Uname=="TIFTON CITY OF"
		replace `v'=. if company_name_eia=="LEBO CITY OF" & Uname=="COATS CITY OF"
		replace `v'=. if company_name_eia=="B AND H GAS CO INC" & Uname=="BTU GAS COMPANY INC "
		replace `v'=. if company_name_eia=="RURAL GAS DIST 1" & Uname=="RURAL WATER SEWER GAS & SOLID WASTE MGMT DIST #10 OF DELAWARE CTY OK"
		replace `v'=. if company_name_eia=="BRAZORIA CITY OF" & Uname=="BRADY MUNICIPAL GAS CORP CITY OF"
		replace `v'=. if company_name_eia=="PLAINS CITY OF" & Uname=="PORT ARANSAS CITY OF"
		replace `v'=. if company_name_eia=="VINA TOWN OF NATURAL GAS" & Uname=="RED BAY NATURAL GAS SYSTEM TOWN OF"	
		replace `v'=. if company_name_eia=="FLORALA GAS DEPARTMENT" & Uname=="SHEFFIELD GAS DEPARTMENT"	
		replace `v'=. if company_name_eia=="ARKANSAS OKLAHOMA GAS CORPORATION" & Uname=="ARKANSAS WESTERN GAS CO"	
		replace `v'=. if company_name_eia=="ARKANSAS OKLAHOMA GAS CORPORATION" & Uname=="ARKANSAS WESTERN GAS COMPANY - NORTHEAST REGION"	
		replace `v'=. if company_name_eia=="FORT PIERCE UTIL AUTH" & Uname=="FORT WALTON BEACH HOUSING AUTH"	
		replace `v'=. if company_name_eia=="FLORIDA PUBLIC UTILITIES INDIANTOWN" & Uname=="FLORIDA ROCK INDUSTRIES INC"	
		replace `v'=. if company_name_eia=="EQUALITY MUNICIPAL GAS" & Uname=="KARNAK MUNICIPAL GAS UTILITY"	
		replace `v'=. if company_name_eia=="BETHANY VILLAGE OF" & Uname=="RANTOUL VILLAGE OF"	
		replace `v'=. if company_name_eia=="SHARON CITY OF" & Uname=="HUGOTON CITY OF"	
		replace `v'=. if company_name_eia=="TEXAS KANSAS OKLAHOMA GAS LLC" & Uname=="CMF OF KANSAS LLC"	
		replace `v'=. if company_name_eia=="CITY OF PARTRIDGE" & Uname=="CITY OF CONCORDIA"	
		replace `v'=. if company_name_eia=="RICHARDSVILLE GAS CO INC" & Uname=="BURKESVILLE GAS CO INC"	
		replace `v'=. if company_name_eia=="VALLEY GAS INC" & Uname=="BELFRY GAS INC"	
		replace `v'=. if company_name_eia=="ORBIT GAS TRANSMISSION INC" & Uname=="COLUMBIA GAS TRANSMISSION LLC"	
		replace `v'=. if company_name_eia=="COLVIN GAS CO" & Uname=="COLUMBUS HOBBS GAS CO"	
		replace `v'=. if company_name_eia=="GAS UTIL DIST 1 EBR PARISH" & Uname=="POINTE COUPEE PARISH GAS UTIL DIST # 1 OF"	
		replace `v'=. if company_name_eia=="LOUISIANA GAS SERVICE CO" & Uname=="TRANS LOUISIANA GAS CO INC"	
		replace `v'=. if company_name_eia=="NSTAR GAS COMPANY" & Uname=="NEW ENGLAND GAS COMPANY"	
		replace `v'=. if company_name_eia=="BAGLEY PUBLIC UTILITIES" & Uname=="WESTBROOK PUBLIC UTILITIES"	
		replace `v'=. if company_name_eia=="GREAT PLAINS NATURAL GAS CO" & Uname=="NORTHWEST NATURAL GAS CO"	
		replace `v'=. if company_name_eia=="MISSOURI GAS ENERGY" & Uname=="TORO ENERGY OF MISSOURI LLC"	
		replace `v'=. if company_name_eia=="MISSOURI GAS UTILITY" & Uname=="MISSOURI JOINT MUNICIPAL ELECTRIC UTILITY COMMISSION"	
		replace `v'=. if company_name_eia=="CENTRAL CITY GAS DEPT" & Uname=="CENTRAL FARMER COOPERATIVE"	
		replace `v'=. if company_name_eia=="VILLAGE OF VERONA" & Uname=="THE VILLAGE OF OBETZ"	
		replace `v'=. if company_name_eia=="WEST TEXAS GAS INC" & Uname=="WESTERN RESOURCES INC (AKA KPL CO OR GAS SERVICE)"	
		replace `v'=. if company_name_eia=="MANNFORD PUB WORKS AUTHORITY" & Uname=="ORLANDO PUBLIC WORKS AUTH"	
		replace `v'=. if company_name_eia=="VICI PUB WORK AUTH" & Uname=="SLICK PUBLIC WORKS AUTH"	
		replace `v'=. if company_name_eia=="ENABLE GATHERING AND PROCESSING" & Uname=="TIMBERLAND GATHERING & PROCESSING CO INC"	
		replace `v'=. if company_name_eia=="CITY OF CLEVELAND" & Uname=="CITY OF HOOKER"	
		replace `v'=. if company_name_eia=="GII GAS COMPANY" & Uname=="BLUEFLAME GAS COMPANY"	
		replace `v'=. if company_name_eia=="LARKIN OIL AND GAS CO" & Uname=="HERMAN OIL & GAS CO INC"	
		replace `v'=. if company_name_eia=="NATIONAL FUEL GAS DIST PA" & Uname=="ORWELL NATURAL GAS PA DIVISION"	
		replace `v'=. if company_name_eia=="WALKER GAS & OIL COMPANY INC" & Uname=="WALLY GAS CO INC"	
		replace `v'=. if company_name_eia=="NORTHWESTERN ENERGY" & Uname=="NORTHERN STATES POWER CO OF MINNESOTA"	
		replace `v'=. if company_name_eia=="MARIETTA CITY OF" & Uname=="GARY CITY OF"	
		replace `v'=. if company_name_eia=="GOLDSMITH CITY OF" & Uname=="GARY CITY OF"	
		replace `v'=. if company_name_eia=="THE NATURAL GAS COMPANY LLC" & Uname=="NORTHERN NATURAL GAS CO"	
		replace `v'=. if company_name_eia=="TEXAS WESTERN MUN GAS" & Uname=="TEXAS GAS DISTRIBUTORS INC"	
		replace `v'=. if company_name_eia=="APPLEBY NATURAL GAS" & Uname=="ZIA NATURAL GAS CO"	
		replace `v'=. if company_name_eia=="TRANSWESTERN PIPELINE COMPANY" & Uname=="OKTEX PIPELINE COMPANY LLC"	
		replace `v'=. if company_name_eia=="OASIS PIPE LINE COMPANY TEXAS LP" & Uname=="GATEWAY PIPELINE COMPANY"	
		replace `v'=. if company_name_eia=="HOPE GAS INC" & Uname=="THOMPSON GAS INC"	
		replace `v'=. if company_name_eia=="BLACKSVILLE OIL GAS COMPANY" & Uname=="BAZZLE GAS COMPANY"	
	}	
	foreach v in Uname{
		replace `v'="" if company_name_eia=="MONROE UTILITIES CITY OF" & Uname=="TIFTON CITY OF"
		replace `v'="" if company_name_eia=="LEBO CITY OF" & Uname=="COATS CITY OF"
		replace `v'="" if company_name_eia=="B AND H GAS CO INC" & Uname=="BTU GAS COMPANY INC "
		replace `v'="" if company_name_eia=="RURAL GAS DIST 1" & Uname=="RURAL WATER SEWER GAS & SOLID WASTE MGMT DIST #10 OF DELAWARE CTY OK"
		replace `v'="" if company_name_eia=="BRAZORIA CITY OF" & Uname=="BRADY MUNICIPAL GAS CORP CITY OF"
		replace `v'="" if company_name_eia=="PLAINS CITY OF" & Uname=="PORT ARANSAS CITY OF"
		replace `v'="" if company_name_eia=="VINA TOWN OF NATURAL GAS" & Uname=="RED BAY NATURAL GAS SYSTEM TOWN OF"	
		replace `v'="" if company_name_eia=="FLORALA GAS DEPARTMENT" & Uname=="SHEFFIELD GAS DEPARTMENT"	
		replace `v'="" if company_name_eia=="ARKANSAS OKLAHOMA GAS CORPORATION" & Uname=="ARKANSAS WESTERN GAS CO"	
		replace `v'="" if company_name_eia=="ARKANSAS OKLAHOMA GAS CORPORATION" & Uname=="ARKANSAS WESTERN GAS COMPANY - NORTHEAST REGION"	
		replace `v'="" if company_name_eia=="FORT PIERCE UTIL AUTH" & Uname=="FORT WALTON BEACH HOUSING AUTH"	
		replace `v'="" if company_name_eia=="FLORIDA PUBLIC UTILITIES INDIANTOWN" & Uname=="FLORIDA ROCK INDUSTRIES INC"	
		replace `v'="" if company_name_eia=="EQUALITY MUNICIPAL GAS" & Uname=="KARNAK MUNICIPAL GAS UTILITY"	
		replace `v'="" if company_name_eia=="BETHANY VILLAGE OF" & Uname=="RANTOUL VILLAGE OF"	
		replace `v'="" if company_name_eia=="SHARON CITY OF" & Uname=="HUGOTON CITY OF"	
		replace `v'="" if company_name_eia=="TEXAS KANSAS OKLAHOMA GAS LLC" & Uname=="CMF OF KANSAS LLC"	
		replace `v'="" if company_name_eia=="CITY OF PARTRIDGE" & Uname=="CITY OF CONCORDIA"	
		replace `v'="" if company_name_eia=="RICHARDSVILLE GAS CO INC" & Uname=="BURKESVILLE GAS CO INC"	
		replace `v'="" if company_name_eia=="VALLEY GAS INC" & Uname=="BELFRY GAS INC"	
		replace `v'="" if company_name_eia=="ORBIT GAS TRANSMISSION INC" & Uname=="COLUMBIA GAS TRANSMISSION LLC"	
		replace `v'="" if company_name_eia=="COLVIN GAS CO" & Uname=="COLUMBUS HOBBS GAS CO"	
		replace `v'="" if company_name_eia=="GAS UTIL DIST 1 EBR PARISH" & Uname=="POINTE COUPEE PARISH GAS UTIL DIST # 1 OF"	
		replace `v'="" if company_name_eia=="LOUISIANA GAS SERVICE CO" & Uname=="TRANS LOUISIANA GAS CO INC"	
		replace `v'="" if company_name_eia=="NSTAR GAS COMPANY" & Uname=="NEW ENGLAND GAS COMPANY"	
		replace `v'="" if company_name_eia=="BAGLEY PUBLIC UTILITIES" & Uname=="WESTBROOK PUBLIC UTILITIES"	
		replace `v'="" if company_name_eia=="GREAT PLAINS NATURAL GAS CO" & Uname=="NORTHWEST NATURAL GAS CO"	
		replace `v'="" if company_name_eia=="MISSOURI GAS ENERGY" & Uname=="TORO ENERGY OF MISSOURI LLC"	
		replace `v'="" if company_name_eia=="MISSOURI GAS UTILITY" & Uname=="MISSOURI JOINT MUNICIPAL ELECTRIC UTILITY COMMISSION"	
		replace `v'="" if company_name_eia=="CENTRAL CITY GAS DEPT" & Uname=="CENTRAL FARMER COOPERATIVE"	
		replace `v'="" if company_name_eia=="VILLAGE OF VERONA" & Uname=="THE VILLAGE OF OBETZ"	
		replace `v'="" if company_name_eia=="WEST TEXAS GAS INC" & Uname=="WESTERN RESOURCES INC (AKA KPL CO OR GAS SERVICE)"	
		replace `v'="" if company_name_eia=="MANNFORD PUB WORKS AUTHORITY" & Uname=="ORLANDO PUBLIC WORKS AUTH"	
		replace `v'="" if company_name_eia=="VICI PUB WORK AUTH" & Uname=="SLICK PUBLIC WORKS AUTH"	
		replace `v'="" if company_name_eia=="ENABLE GATHERING AND PROCESSING" & Uname=="TIMBERLAND GATHERING & PROCESSING CO INC"	
		replace `v'="" if company_name_eia=="CITY OF CLEVELAND" & Uname=="CITY OF HOOKER"	
		replace `v'="" if company_name_eia=="GII GAS COMPANY" & Uname=="BLUEFLAME GAS COMPANY"	
		replace `v'="" if company_name_eia=="LARKIN OIL AND GAS CO" & Uname=="HERMAN OIL & GAS CO INC"	
		replace `v'="" if company_name_eia=="NATIONAL FUEL GAS DIST PA" & Uname=="ORWELL NATURAL GAS PA DIVISION"	
		replace `v'="" if company_name_eia=="WALKER GAS & OIL COMPANY INC" & Uname=="WALLY GAS CO INC"	
		replace `v'="" if company_name_eia=="NORTHWESTERN ENERGY" & Uname=="NORTHERN STATES POWER CO OF MINNESOTA"	
		replace `v'="" if company_name_eia=="MARIETTA CITY OF" & Uname=="GARY CITY OF"	
		replace `v'="" if company_name_eia=="GOLDSMITH CITY OF" & Uname=="GARY CITY OF"	
		replace `v'="" if company_name_eia=="THE NATURAL GAS COMPANY LLC" & Uname=="NORTHERN NATURAL GAS CO"	
		replace `v'="" if company_name_eia=="TEXAS WESTERN MUN GAS" & Uname=="TEXAS GAS DISTRIBUTORS INC"	
		replace `v'="" if company_name_eia=="APPLEBY NATURAL GAS" & Uname=="ZIA NATURAL GAS CO"	
		replace `v'="" if company_name_eia=="TRANSWESTERN PIPELINE COMPANY" & Uname=="OKTEX PIPELINE COMPANY LLC"	
		replace `v'="" if company_name_eia=="OASIS PIPE LINE COMPANY TEXAS LP" & Uname=="GATEWAY PIPELINE COMPANY"	
		replace `v'="" if company_name_eia=="HOPE GAS INC" & Uname=="THOMPSON GAS INC"	
		replace `v'="" if company_name_eia=="BLACKSVILLE OIL GAS COMPANY" & Uname=="BAZZLE GAS COMPANY"	
	}	
//br *name* if match_q<.95 & match_q!=.	
//br if match_q>=.95 & match_q<.97 //spotcheck looks good 
drop id_eia id_phmsa Uname
//checks i will do:
//regression if match_q==1
//regression if match_by_hand!=1
count //30169

gen temp=operator_id_phmsa
	replace operator_id_phmsa=9999*100000+[_n] if operator_id_phmsa==.
	merge 1:1 operator_id_phmsa state year using data/phmsa/annual_gas_distribution_1990_2019, ///
		keep(master match) nogen
	replace operator_id_phmsa=temp //this allows for unique matching
	drop temp
	count if operator_id_phmsa!=. & resi_total_cust!=. & resi_total_cust!=0
	local temp1=r(N)
	count if resi_total_cust!=. & resi_total_cust!=0
	local temp2=r(N)
	disp `temp1'/`temp2' //83% match right now
	sum resi_total_cust if operator_id_phmsa!=.
	local temp1=r(mean)*r(N)
	sum resi_total_cust if operator_id_phmsa==.
	local temp2=r(mean)*r(N)
	disp `temp1'/(`temp1'+`temp2') //87% match right now.
	sum resi_sales_cust if operator_id_phmsa!=.
	local temp1=r(mean)*r(N)
	sum resi_sales_cust if operator_id_phmsa==.
	local temp2=r(mean)*r(N)
	disp `temp1'/(`temp1'+`temp2') //86% match right now.
	
merge m:1 state year using data/eia/citygate, keep(master match) nogen
merge m:1 year using data/cpi/cpi, keep(master match) nogen

rename state state_abbr
merge m:1 state_abbr year using data/noaa/weather, keep(master match) nogen //no weather: ak, hi
merge m:1 state_abbr using data/fips/state_fips, keep(master match) nogen //dropping american samoa, guam, etc
merge m:1 fips using data/census/census_regions, keep(master match) nogen
	
compress
save data/legacy_utility_data, replace



///////////////////////////////////////
//CONSTRUCTING VARIABLES FOR REGRESSIONS
use data/legacy_utility_data, clear
encode company_id_eia, gen(id)
xtset id year

drop if resi_total_cust==0 //only applies to 4 observations
	//in data cleaning file, we drop if customers==0 & volume==0
	//not clear why these four observations report volume but not customers
	
//zeros vs missing:
//trans revenue and price not ever observed before 2008, so zeros are not true zeroes:
//(almost all zero or blank; 1 observation each that's non-zero)
foreach v in resi_trans_price resi_trans_revenue ///
	comm_trans_price comm_trans_revenue ///
	indu_trans_price indu_trans_revenue {
	replace `v'=. if year<2008
}
//zero prices are never true:
foreach v in resi_sales_price resi_trans_price ///
	comm_sales_price comm_trans_price ///
	indu_sales_price indu_trans_price {
	replace `v'=. if `v'==0
}
//if the volume is non-zero, revenue should not be zero because zero prices are never true:
foreach v in resi_sales resi_trans ///
	comm_sales comm_trans ///
	indu_sales indu_trans {
	replace `v'_revenue=. if `v'_revenue==0 & `v'_volume!=0 & `v'_volume!=.
}
//blank volumes should be zero if total volumes are reported and equal to the other type:
foreach v in resi comm indu {
	replace `v'_sales_volume=0 if `v'_sales_volume==. & `v'_total_volume!=. & `v'_total_volume==`v'_trans_volume
	replace `v'_trans_volume=0 if `v'_trans_volume==. & `v'_total_volume!=. & `v'_total_volume==`v'_sales_volume
}
//blank revenues should be zero if volumes are zero:
foreach v in resi_sales resi_trans ///
	comm_sales comm_trans ///
	indu_sales indu_trans {
	replace `v'_revenue=0 if `v'_revenue==. & `v'_volume==0 
}
//blank customers of one type should be zero if total customers = customers of the other type:
foreach v in resi comm indu {
	replace `v'_sales_cust=0 if `v'_sales_cust==. & `v'_trans_cust==`v'_total_cust
	replace `v'_trans_cust=0 if `v'_trans_cust==. & `v'_sales_cust==`v'_total_cust
}
//blank customers should be zero if total customers are not blank: (this applies to 0 resi obs, after the above)
	//for commercial, all of these have comm_total_cust==0
foreach v in resi comm indu {
	replace `v'_sales_cust=0 if `v'_sales_cust==. & `v'_total_cust!=. 
	replace `v'_trans_cust=0 if `v'_trans_cust==. & `v'_total_cust!=.	
}

//real vs nominal
foreach v in citygate citygate_alt_simple citygate_alt_wtall citygate_alt_wtresi citygate_alt_wtcomm {
	gen `v'_real=`v'_nominal/cpi*$cpi2019
}
foreach v in resi_sales_price resi_trans_price resi_sales_revenue resi_trans_revenue ///
	comm_sales_price comm_trans_price comm_sales_revenue comm_trans_revenue ///
	indu_sales_price indu_trans_price indu_sales_revenue indu_trans_revenue {
	rename `v' `v'_nominal
	gen `v'_real=`v'_nominal/cpi*$cpi2019
}

//net revenue
//transport revenue is not reported in some years
/*primary net revenue measure: all utilities and all years, 
	assuming same mark-up for transport and for sales customers, 
	but only if mostly sales customers (90 percent)*/
/*alternative net revenue measure: all utilities and all years, 
	assuming same mark-up for transport and for sales customers, 
	but only if mostly sales customers (other percents)
	this does not necessitate new variable creation*/	
/*alternative net revenue measure: as reported, so unbalanced panel*/
/*alternative: as reported for no retail choice (does not require new variable)*/
/*alternative: as reported for yes retail choice, limited years (does not require new variable)*/
foreach v in resi comm indu{
	gen `v'_nr1_real = `v'_total_volume * (`v'_sales_price_real - citygate_real)
	//note will be blank if *no* sales customers - because nothing to use for our assumed price
	gen `v'_cust1 = `v'_total_cust
	gen `v'_nrpc1_real = `v'_nr1_real / `v'_cust1
	gen `v'_q1 = `v'_total_volume
	gen `v'_qpc1 = `v'_total_volume / `v'_cust1
	gen `v'_nrpq1_real = `v'_nr1_real / `v'_q1
}
foreach v in resi comm indu{
	gen `v'_nr2_real = `v'_sales_volume * (`v'_sales_price_real - citygate_real) ///
		if (`v'_trans_cust==0 | `v'_trans_cust==.)
		replace `v'_nr2_real = `v'_sales_volume * (`v'_sales_price_real - citygate_real) ///
			+ `v'_trans_revenue_real if `v'_trans_revenue_real!=.
	gen `v'_cust2 = `v'_sales_cust if (`v'_trans_cust==0 | `v'_trans_cust==.)
		replace `v'_cust2 = `v'_total_cust if `v'_trans_cust!=.
	gen `v'_nrpc2_real = `v'_nr2_real / `v'_cust2
	gen `v'_q2 = `v'_sales_volume if (`v'_trans_cust==0 | `v'_trans_cust==.)
		replace `v'_q2 = `v'_total_volume if `v'_trans_volume!=.
	gen `v'_qpc2 = `v'_total_volume / `v'_cust2
	gen `v'_nrpq2_real = `v'_nr2_real / `v'_q2
}
	sum resi_nr1 resi_nr2 //version 2 has more missing values, as expected
	corr resi_nr1 resi_nr2 //highly correlated, as expected
	tab year if resi_nr1!=. & resi_nr2==. //around 50 utilities per year, 1997-2007, as expected
	sum resi_cust1 if resi_nr1!=.
		local temp1=r(mean)*r(N)
	sum resi_cust2 if resi_nr2!=.
		local temp2=r(mean)*r(N)	
	disp `temp2'/`temp1' //78 percent of customers
	sum resi_cust1 if resi_nr1!=.
		local temp1=r(N)
	sum resi_cust2 if resi_nr2!=.
		local temp2=r(N)	
	disp `temp2'/`temp1' //98 percent of observations
	sum resi_cust1 if resi_nr1!=. & year<=2007
		local temp1=r(mean)*r(N)
	sum resi_cust2 if resi_nr2!=. & year<=2007
		local temp2=r(mean)*r(N)	
	disp `temp2'/`temp1' //51 percent of customers
	sum resi_cust1 if resi_nr1!=. & year>2007
		local temp1=r(mean)*r(N)
	sum resi_cust2 if resi_nr2!=. & year>2007
		local temp2=r(mean)*r(N)	
	disp `temp2'/`temp1' //100 percent of customers
	sum resi_cust1 if resi_nr1!=. & year<=2007
		local temp1=r(N)
	sum resi_cust2 if resi_nr2!=. & year<=2007
		local temp2=r(N)	
	disp `temp2'/`temp1' //96 percent of observations
	sum resi_cust1 if resi_nr1!=. & year>2007
		local temp1=r(N)
	sum resi_cust2 if resi_nr2!=. & year>2007
		local temp2=r(N)	
	disp `temp2'/`temp1' //100 percent of observations
	
gen num_srvs=num_srvs_total
	replace num_srvs=num_srvs_total_version1 ///
		if num_srvs_total_version1==num_srvs_total_version2 & num_srvs_total==.
	tab year if num_srvs==.
	drop num_srvs_* //not keeping e.g. by plastic vs steel
gen mmiles=mmiles_total
	replace mmiles=mmiles_total_version1a if mmiles_total==. //different versions are nearly identical
	tab year if mmiles==.
	drop mmiles_*

//logs - okay to drop zeroes because not conceptually part of main sample
foreach s in resi comm indu {
foreach v in nr1_real cust1 nrpc1_real q1 qpc1 nrpq1_real ///
			 nr2_real cust2 nrpc2_real q2 qpc2 nrpq2_real {
	gen ln`s'_`v'=ln(`s'_`v')
}
}
gen lnnum_srvs=ln(num_srvs)
gen lnmmiles=ln(mmiles)

//asymmetry, levels and log levels
foreach s in resi comm indu {
foreach v in 1 2 {
	xtset id year
	gen `s'_dummy_grow_`v'=1 if `s'_cust`v'>l.`s'_cust`v'
		replace `s'_dummy_grow_`v'=0 if `s'_dummy_grow_`v'==.
		replace `s'_dummy_grow_`v'=. if `s'_cust`v'==. | l.`s'_cust`v'==.
	gen `s'_grow`v'=`s'_cust`v' * `s'_dummy_grow_`v'
	gen `s'_shrink`v'=`s'_cust`v' * (1-`s'_dummy_grow_`v')
}
}
foreach s in resi comm indu {
foreach v in grow1 shrink1 grow2 shrink2{
	gen ln`s'_`v'=ln(`s'_`v') //these are logs of one year variables, not logs of differences, so this is ok
	replace ln`s'_`v'=0 if `s'_`v'==0 //ie when not growing or not shrinking
}
}

xtset id year
//asymmetry, log differences
foreach s in resi comm indu {
foreach v in 1 2 {
	xtset id year
		//plain id, because we will drop jumps manually
	gen dln`s'_nr`v'=d.ln`s'_nr`v'
	gen dln`s'_nrpc`v'=d.ln`s'_nrpc`v'
	gen dln`s'_grow`v'=d.ln`s'_cust`v'
		replace dln`s'_grow`v'=0 if d.ln`s'_cust`v'<0 & d.ln`s'_cust`v'!=.
	gen dln`s'_shrink`v'=d.ln`s'_cust`v'
		replace dln`s'_shrink`v'=0 if d.ln`s'_cust`v'>0 & d.ln`s'_cust`v'!=.
}
}
foreach v in lnmmiles {
	gen d`v'=d.`v'
}

//regime-specific effects, but just two slope coefficients.
//accounting for possible service territory changes
xtset id year
	gen idtemp=[_n] if abs(s.lnresi_cust1)>=.2 | abs(s.lncomm_cust1)>=.5
	replace idtemp=[_n] if s.lnresi_cust1==. | s.lncomm_cust1==.
		//new marker for first year, after missings, or when customer counts change by a lot
		//browsing the data, it looks like "after missings" is fairly infrequent
	replace idtemp=idtemp[_n-1] if idtemp==.
	egen idnojumps=group(id idtemp)
	drop idtemp
foreach s in resi comm {
foreach v in 1 {
	xtset idnojumps year
	gen `s'temp1=1 if idnojumps!=l.idnojumps
		//new marker for first year, after missings, or when customer counts change by a lot
	forval year=1998(1)2019{
	replace `s'temp1=l.`s'temp1 if (idnojumps==l.idnojumps) ///
		& (`s'_dummy_grow_`v'==l.`s'_dummy_grow_`v') & year==`year'
		//same marker if still growing or stil shrinking
	replace `s'temp1=l.`s'temp1+1 if (idnojumps==l.idnojumps) ///
		& (`s'_dummy_grow_`v'!=l.`s'_dummy_grow_`v') & year==`year'	
		//new marker if regime flips
	}	
	//drop temp1
}
}
gen tempcount=1 
egen fe_regime=group(idnojumps resitemp1 commtemp1)
	//new regime if either residential or commercial flips sign
	egen resi_fe_regime=group(idnojumps resitemp1)
	egen comm_fe_regime=group(idnojumps commtemp1)
bysort fe_regime: egen regime_length=sum(tempcount)
	bysort resi_fe_regime: egen resi_regime_length=sum(tempcount)
	bysort comm_fe_regime: egen comm_regime_length=sum(tempcount)
bysort fe_regime: gen regime_place=[_n]
	bysort resi_fe_regime: gen resi_regime_place=[_n]
	bysort comm_fe_regime: gen comm_regime_place=[_n]
drop tempcount resitemp1 commtemp1

gen portion_resi_sales=resi_sales_cust/resi_total_cust
	bysort id: egen min_portion_resi_sales=min(portion_resi_sales)
	//ever falls below threshold eg 90%
gen portion_comm_sales=comm_sales_cust/comm_total_cust
	bysort id: egen min_portion_comm_sales=min(portion_comm_sales)
	
foreach s in resi comm indu {
foreach v in nr1_real cust1 nrpc1_real q1 qpc1 nrpq1_real ///
	nr2_real cust2 nrpc2_real q2 qpc2 nrpq2_real ///
	grow1 shrink1 grow2 shrink2 {
	format `s'_`v' %9.0f
	format ln`s'_`v' %9.2f
}
}
foreach s in resi comm indu {
foreach v in 1 2 {
	format dln`s'_nr`v' %9.2f
	format dln`s'_grow`v' %9.2f
	format dln`s'_shrink`v' %9.2f
}
}
foreach v in num_srvs mmiles {
	format `v' %9.0f
	format ln`v' %9.2f
}
foreach s in resi comm indu {
foreach v in dummy_grow_1 dummy_grow_2{
	format `s'_`v' %9.2f		 
}
}
foreach v in portion_resi_sales portion_comm_sales iou muni {
	format `v' %9.2f
}

compress
save data/legacy_utility_data_controls, replace










